#!/home/chunwei/chunenv/bin/python
# -*- coding: utf-8 -*-
import sys
import pandas as pd
import numpy as np
import datetime as d
import argparse
'''
初始化user的feature

加入username
'''
def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('log_path')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()

time_format = "%Y-%m-%dT%H:%M:%S"

log_path, output = args['log_path'], args['output']

log = pd.read_csv(log_path)
event_set = set()
log.event.map(lambda x: event_set.add(x))
log_date = log.time.map(lambda x: d.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S"))
log['date'] = log_date

user_fea = pd.DataFrame()

user_event_date = {}
for username, event, date in log[ ["username", "event", "date"] ].values:
    if username not in user_event_date:
        user_event_date[username] = { 'overall': []}
    if event not in user_event_date[username]:
        user_event_date[username][event] = []
    user_event_date[username]['overall'].append(date)
    user_event_date[username][event].append(date)

usernames = set()
log.username.map(lambda x: usernames.add(x))

user_fea['username'] = list(usernames)

event_set = set()

log.event.map(lambda x: event_set.add(x))

event_set.add('overall')

# init head
events = {}
for event in event_set:
    user_fea['event_%s_date' % event] = [None for i in range(user_fea.username.count())]
    events[event] = []

for username in user_fea.username.values:
    rcd = user_event_date[username]
    for event in event_set:
        events[event].append(
            ' '.join(date.strftime(time_format) for date in sorted(rcd.get(event, []))))

for event,rcd in events.items():
    user_fea['event_%s_date' % event] = rcd

user_fea.to_csv(output, index=False)
